The purpose of this notebook is to ensure that the Richter-like cells predicted at the initial time points of the disease do not correspond to doublets. To that end, we will merge the RT-CLL predicted doublets detected by cell hashing with the filtered Seurat objects. This ground-truth doublets should cluster at the interface between CLL and RT clusters, while the RT-like cells should be embedded in the clusters.
library(Seurat)
library(ggpubr)
library(ggridges)
library(tidyverse)
path_to_obj <- here::here("results/R_objects/patient_63/7.seurat_list_annotated_reprocessed.rds")
path_to_demultiplex <- here::here("results/R_objects/demultiplexed")
path_to_libraries_metadata <- here::here("1-cellranger_mapping/data/richter_metadata.csv")
path_to_sample_metdata <- here::here("data/sample_metadata_FN.csv")
path_to_richter_signature <- here::here("results/R_objects/richter_signatures_list.rds")
path_to_cll_signature <- here::here("results/R_objects/cll_signatures_list.rds")
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "mediumorchid2", "coral2", "blueviolet",
"indianred4", "deepskyblue1", "dimgray", "deeppink1",
"green", "lightgray", "hotpink1")
# Source functions
source(here::here("bin/utils.R"))
seurat_list <- readRDS(path_to_obj)
seurat_list$`12`$annotation_final <- Idents(seurat_list$`12`)
seurat_list$`19`$annotation_final <- Idents(seurat_list$`19`)
seurat_list$`3299`$annotation_final <- Idents(seurat_list$`3299`)
seurat_list$`3299`$is_richter <- ifelse(
seurat_list$`3299`$time_point %in% c("T2", "T3"),
TRUE,
FALSE
)
seurat_list
## $`12`
## An object of class Seurat
## 23403 features across 5913 samples within 1 assay
## Active assay: RNA (23403 features, 2000 variable features)
## 4 dimensional reductions calculated: pca, harmony, umap, umap_3D
##
## $`19`
## An object of class Seurat
## 23403 features across 7742 samples within 1 assay
## Active assay: RNA (23403 features, 2000 variable features)
## 4 dimensional reductions calculated: pca, harmony, umap, umap_3D
##
## $`3299`
## An object of class Seurat
## 23403 features across 6037 samples within 1 assay
## Active assay: RNA (23403 features, 2000 variable features)
## 4 dimensional reductions calculated: pca, harmony, umap, umap_3D
umaps_annotation <- purrr::map(
seurat_list,
DimPlot,
cols = color_palette,
pt.size = 0.5,
group.by = "annotation_final"
)
umaps_annotation
## $`12`
##
## $`19`
##
## $`3299`
# Load demultiplexed files (with doublets)
files_to_load <- list.files(path_to_demultiplex, full.names = TRUE)
gem_ids <- list.files(path_to_demultiplex, full.names = FALSE) %>%
str_remove("^seurat_") %>%
str_remove("_demultiplexed.rds$")
names(files_to_load) <- gem_ids
seurat_demultiplexed <- purrr::map(files_to_load, readRDS)
# Load metadata files
libraries_metadata <- read_csv(path_to_libraries_metadata)
samples_metadata <- read_csv(path_to_sample_metdata)
DT::datatable(libraries_metadata)
DT::datatable(samples_metadata)
# Load signatures
richter_signatures <- readRDS(path_to_richter_signature)
cll_signatures <- readRDS(path_to_cll_signature)
seurat_demultiplexed_12 <- merge(
x = seurat_demultiplexed$r65huxjg_g0itbe2k,
y = seurat_demultiplexed$bco1we6n_qaz6s7wd
)
seurat_demultiplexed_19 <- merge(
x = seurat_demultiplexed$te04uhdq_2uatjn5y,
y = seurat_demultiplexed$j7z61du3_vyxon6q4
)
seurat_demultiplexed_3299 <- merge(
x = seurat_demultiplexed$p092mcjf_6w9km8re,
y = seurat_demultiplexed$z97dxyfq_xi38kvhc
)
seurat_demultiplexed <- list(
"12" = seurat_demultiplexed_12,
"19" = seurat_demultiplexed_19,
"3299" = seurat_demultiplexed_3299
)
rm(seurat_demultiplexed_12)
rm(seurat_demultiplexed_19)
rm(seurat_demultiplexed_3299)
richter_samples <- c(
"ICGC-365-15-194",
"ICGC-01918-3879A",
"ICGC-012-19-4600B",
"ICGC-3299-17-251",
"ICGC-3299-18-218A",
"ICGC-3299-18-5761A"
)
richter_annotations <- c(
"CCND2hi Richter-like quiescent",
"CCND2lo Richter-like quiescent",
"Richter-like proliferative",
"TP53INP1hi Richter-like quiescent",
"MIR155HGhi CD20hi Richter-like quiescent",
"Richter-like proliferative",
"Richter-like"
)
seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
seurat_obj$annotation_final <- as.character(seurat_obj$annotation_final)
seurat_obj
})
seurat_demultiplexed2 <- purrr::map(names(seurat_list), function(x) {
rt_cll_doublets <-
(seurat_demultiplexed[[x]]$HTO_maxID %in% richter_samples |
seurat_demultiplexed[[x]]$HTO_secondID %in% richter_samples) &
seurat_demultiplexed[[x]]$HTO_classification.global == "Doublet"
rt_cll_doublets <- colnames(seurat_demultiplexed[[x]])[rt_cll_doublets]
rt_seed_cells <-
seurat_list[[x]]$is_richter == FALSE &
seurat_list[[x]]$annotation_final %in% richter_annotations
rt_seed_cells <- colnames(seurat_list[[x]])[rt_seed_cells]
background_cells <- !(colnames(seurat_list[[x]]) %in% rt_seed_cells)
background_cells <- colnames(seurat_list[[x]])[background_cells]
selected_cells <- Reduce(
union,
list(rt_cll_doublets, rt_seed_cells, background_cells)
)
seurat_obj <- subset(seurat_demultiplexed[[x]], cells = selected_cells)
seurat_obj$gem_id <- str_sub(colnames(seurat_obj), start = 1, end = 17)
seurat_obj$classification <- case_when(
colnames(seurat_obj) %in% rt_cll_doublets ~ "RT-CLL doublets",
colnames(seurat_obj) %in% rt_seed_cells ~ "RT seeds",
colnames(seurat_obj) %in% background_cells ~ "Background",
)
seurat_obj$annotation_final <- "unannotated"
seurat_obj$annotation_final[background_cells] <- seurat_list[[x]]$annotation_final[background_cells]
seurat_obj$annotation_final[rt_seed_cells] <- seurat_list[[x]]$annotation_final[rt_seed_cells]
seurat_obj$annotation_final[rt_cll_doublets] <- "RT-CLL doublets"
seurat_obj$donor_id <- x
Idents(seurat_obj) <- "annotation_final"
seurat_obj
})
names(seurat_demultiplexed2) <- names(seurat_list)
rm(seurat_demultiplexed)
seurat_demultiplexed2 <- purrr::map(seurat_demultiplexed2, NormalizeData)
seurat_demultiplexed2 <- purrr::map(
seurat_demultiplexed2,
process_seurat,
dims = 1:20
)
purrr::map(seurat_demultiplexed2, DimPlot, pt.size = 0.5, cols = color_palette)
## $`12`
##
## $`19`
##
## $`3299`
First, we will assess whether Richter transformation (RT) “seed” cells colocalize with doublets:
# Split UMAP by background, "seed cells" and RT-CLL doublets
umap_doublets <- purrr::map(
seurat_demultiplexed2,
plot_split_umap,
var = "classification"
)
umap_doublets
## $`12`
##
## $`19`
##
## $`3299`
Next, we seek to elucidate the HTO of the RT samples in seed cells. RT seed cells which, by definition, are classified as singlets of a non-RT time-point. Thus, if they are not doublets they should not have a significantly higher expression of the RT HTO:
# Patients 12 and 19
# Violin plots
violin_plots_12_19 <- purrr::map(
seurat_demultiplexed2[c("12", "19")],
function(seurat_obj) {
DefaultAssay(seurat_obj) <- "HTO"
selected_features <- rownames(seurat_obj)[rownames(seurat_obj) %in% richter_samples]
seurat_obj <- subset(seurat_obj, features = selected_features)
seurat_obj$richter_hto <- seurat_obj[["HTO"]]@data[1, ]
p <- seurat_obj@meta.data %>%
ggplot(aes(classification, richter_hto, fill = classification)) +
geom_violin() +
facet_wrap(~gem_id) +
labs(x = "", y = "RT HTO normalized expression", fill = "") +
theme_bw() +
theme(legend.position = "none")
p
})
violin_plots_12_19
## $`12`
##
## $`19`
# Ridge plots
density_plots_12_19 <- purrr::map(
seurat_demultiplexed2[c("12", "19")],
function(seurat_obj) {
DefaultAssay(seurat_obj) <- "HTO"
selected_features <- rownames(seurat_obj)[rownames(seurat_obj) %in% richter_samples]
seurat_obj <- subset(seurat_obj, features = selected_features)
seurat_obj$richter_hto <- seurat_obj[["HTO"]]@data[1, ]
p <- seurat_obj@meta.data %>%
ggplot(aes(x = richter_hto, y = classification, fill = classification)) +
geom_density_ridges() +
facet_wrap(~gem_id) +
labs(x = "RT HTO normalized expression", y = "", fill = "") +
theme_bw() +
theme(legend.position = "none")
p
})
density_plots_12_19
## $`12`
##
## $`19`
Finally, we will score each cell with a RT or CLL-specific signature, which we calculated in another notebook. If the RT seed cells are truly Richter-like, they should have a high RT signature and low CLL signature. On the other hand, if they are RT-CLL doublets, they will have an intermediate signature in both:
# Calculate signatures
patients <- names(seurat_demultiplexed2)
seurat_demultiplexed2 <- purrr::map(patients, function(x) {
seurat_obj <- seurat_demultiplexed2[[x]]
DefaultAssay(seurat_obj) <- "RNA"
seurat_obj <- AddModuleScore(
seurat_obj,
features = list(
cll_signature = cll_signatures[[x]],
richter_signature = richter_signatures[[x]]
),
name = c("cll_signature", "richter_signature")
)
seurat_obj
})
names(seurat_demultiplexed2) <- patients
# Plot
violin_plots_signatures <- purrr::map(
seurat_demultiplexed2,
function(seurat_obj) {
cll_p <- seurat_obj@meta.data %>%
ggplot(aes(classification, cll_signature1, fill = classification)) +
geom_violin() +
labs(x = "", y = "CLL signature score") +
theme_bw() +
theme(legend.position = "none")
richter_p <- seurat_obj@meta.data %>%
ggplot(aes(classification, richter_signature2, fill = classification)) +
geom_violin() +
labs(x = "", y = "Richter signature score") +
theme_bw() +
theme(legend.position = "none")
output <- ggarrange(plotlist = list(cll_p, richter_p), ncol = 2)
output
})
violin_plots_signatures
## $`12`
##
## $`19`
##
## $`3299`
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.6 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 tidyverse_1.3.0 ggridges_0.5.3 ggpubr_0.4.0 ggplot2_3.3.3 SeuratObject_4.0.1 Seurat_4.0.1 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.4 crosstalk_1.1.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2 magrittr_2.0.1 tensor_1.5 cluster_2.1.1 ROCR_1.0-11 openxlsx_4.2.3 globals_0.14.0 modelr_0.1.8 matrixStats_0.58.0 spatstat.sparse_2.0-0 colorspace_2.0-1 rvest_1.0.0 ggrepel_0.9.1 haven_2.3.1 xfun_0.22 crayon_1.4.1 jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-10 zoo_1.8-9 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.7 car_3.0-10 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 rstatix_0.7.0 miniUI_0.1.1.1 Rcpp_1.0.6 viridisLite_0.4.0 xtable_1.8-4 reticulate_1.20 spatstat.core_2.1-2 foreign_0.8-81 DT_0.17 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2
## [55] farver_2.1.0 pkgconfig_2.0.3 sass_0.4.0 uwot_0.1.10 dbplyr_2.1.0 deldir_0.2-10 here_1.0.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.1 rlang_0.4.11 reshape2_1.4.4 later_1.2.0 munsell_0.5.0 cellranger_1.1.0 tools_4.0.4 cli_2.5.0 generics_0.1.0 broom_0.7.5 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 fs_1.5.0 knitr_1.31 fitdistrplus_1.1-3 zip_2.1.1 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-152 mime_0.10 xml2_1.3.2 rstudioapi_0.13 compiler_4.0.4 plotly_4.9.3 curl_4.3.1 png_0.1-7 ggsignif_0.6.1 spatstat.utils_2.1-0 reprex_1.0.0 bslib_0.2.5 stringi_1.6.2 highr_0.8 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.3-3 vctrs_0.3.8 pillar_1.6.1 lifecycle_1.0.0 BiocManager_1.30.10 spatstat.geom_2.1-0 lmtest_0.9-38 jquerylib_0.1.4
## [109] RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3 httpuv_1.6.1 patchwork_1.1.1 R6_2.5.0 bookdown_0.21 promises_1.2.0.1 KernSmooth_2.23-18 gridExtra_2.3 rio_0.5.26 parallelly_1.25.0 codetools_0.2-18 MASS_7.3-53.1 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2 sctransform_0.3.2 mgcv_1.8-34 parallel_4.0.4 hms_1.0.0 grid_4.0.4 rpart_4.1-15 rmarkdown_2.7 carData_3.0-4 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.10